1 Summary
- datamodelrを使ってみる
2 Preparations
2.1 libraries
library(dplyr)
library(readr)
library(purrr)
library(knitr)
library(datamodelr)
library(nycflights13)
options(scipen=10, readr.num_columns = 0)
opts_chunk$set(
#out.width = '\\maxwidth'
highlight = T
)2.2 Data Prep
dm_f <- dm_from_data_frames(flights, airlines, weather, airports, planes)2.3 Data Check
map(list(flights, airlines, weather, airports, planes), str)## Classes 'tbl_df', 'tbl' and 'data.frame': 336776 obs. of 19 variables:
## $ year : int 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
## $ month : int 1 1 1 1 1 1 1 1 1 1 ...
## $ day : int 1 1 1 1 1 1 1 1 1 1 ...
## $ dep_time : int 517 533 542 544 554 554 555 557 557 558 ...
## $ sched_dep_time: int 515 529 540 545 600 558 600 600 600 600 ...
## $ dep_delay : num 2 4 2 -1 -6 -4 -5 -3 -3 -2 ...
## $ arr_time : int 830 850 923 1004 812 740 913 709 838 753 ...
## $ sched_arr_time: int 819 830 850 1022 837 728 854 723 846 745 ...
## $ arr_delay : num 11 20 33 -18 -25 12 19 -14 -8 8 ...
## $ carrier : chr "UA" "UA" "AA" "B6" ...
## $ flight : int 1545 1714 1141 725 461 1696 507 5708 79 301 ...
## $ tailnum : chr "N14228" "N24211" "N619AA" "N804JB" ...
## $ origin : chr "EWR" "LGA" "JFK" "JFK" ...
## $ dest : chr "IAH" "IAH" "MIA" "BQN" ...
## $ air_time : num 227 227 160 183 116 150 158 53 140 138 ...
## $ distance : num 1400 1416 1089 1576 762 ...
## $ hour : num 5 5 5 5 6 5 6 6 6 6 ...
## $ minute : num 15 29 40 45 0 58 0 0 0 0 ...
## $ time_hour : POSIXct, format: "2013-01-01 05:00:00" "2013-01-01 05:00:00" ...
## Classes 'tbl_df', 'tbl' and 'data.frame': 16 obs. of 2 variables:
## $ carrier: chr "9E" "AA" "AS" "B6" ...
## $ name : chr "Endeavor Air Inc." "American Airlines Inc." "Alaska Airlines Inc." "JetBlue Airways" ...
## Classes 'tbl_df', 'tbl' and 'data.frame': 26115 obs. of 15 variables:
## $ origin : chr "EWR" "EWR" "EWR" "EWR" ...
## $ year : num 2013 2013 2013 2013 2013 ...
## $ month : num 1 1 1 1 1 1 1 1 1 1 ...
## $ day : int 1 1 1 1 1 1 1 1 1 1 ...
## $ hour : int 1 2 3 4 5 6 7 8 9 10 ...
## $ temp : num 39 39 39 39.9 39 ...
## $ dewp : num 26.1 27 28 28 28 ...
## $ humid : num 59.4 61.6 64.4 62.2 64.4 ...
## $ wind_dir : num 270 250 240 250 260 240 240 250 260 260 ...
## $ wind_speed: num 10.36 8.06 11.51 12.66 12.66 ...
## $ wind_gust : num NA NA NA NA NA NA NA NA NA NA ...
## $ precip : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pressure : num 1012 1012 1012 1012 1012 ...
## $ visib : num 10 10 10 10 10 10 10 10 10 10 ...
## $ time_hour : POSIXct, format: "2013-01-01 01:00:00" "2013-01-01 02:00:00" ...
## Classes 'tbl_df', 'tbl' and 'data.frame': 1458 obs. of 8 variables:
## $ faa : chr "04G" "06A" "06C" "06N" ...
## $ name : chr "Lansdowne Airport" "Moton Field Municipal Airport" "Schaumburg Regional" "Randall Airport" ...
## $ lat : num 41.1 32.5 42 41.4 31.1 ...
## $ lon : num -80.6 -85.7 -88.1 -74.4 -81.4 ...
## $ alt : int 1044 264 801 523 11 1593 730 492 1000 108 ...
## $ tz : num -5 -6 -6 -5 -5 -5 -5 -5 -5 -8 ...
## $ dst : chr "A" "A" "A" "A" ...
## $ tzone: chr "America/New_York" "America/Chicago" "America/Chicago" "America/New_York" ...
## - attr(*, "spec")=
## .. cols(
## .. id = col_integer(),
## .. name = col_character(),
## .. city = col_character(),
## .. country = col_character(),
## .. faa = col_character(),
## .. icao = col_character(),
## .. lat = col_double(),
## .. lon = col_double(),
## .. alt = col_integer(),
## .. tz = col_double(),
## .. dst = col_character(),
## .. tzone = col_character()
## .. )
## Classes 'tbl_df', 'tbl' and 'data.frame': 3322 obs. of 9 variables:
## $ tailnum : chr "N10156" "N102UW" "N103US" "N104UW" ...
## $ year : int 2004 1998 1999 1999 2002 1999 1999 1999 1999 1999 ...
## $ type : chr "Fixed wing multi engine" "Fixed wing multi engine" "Fixed wing multi engine" "Fixed wing multi engine" ...
## $ manufacturer: chr "EMBRAER" "AIRBUS INDUSTRIE" "AIRBUS INDUSTRIE" "AIRBUS INDUSTRIE" ...
## $ model : chr "EMB-145XR" "A320-214" "A320-214" "A320-214" ...
## $ engines : int 2 2 2 2 2 2 2 2 2 2 ...
## $ seats : int 55 182 182 182 55 182 182 182 182 182 ...
## $ speed : int NA NA NA NA NA NA NA NA NA NA ...
## $ engine : chr "Turbo-fan" "Turbo-fan" "Turbo-fan" "Turbo-fan" ...
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
3 Example
3.1 Schema Card
dm_create_graph でカード(勝手に命名)を作成する
graph <- dm_create_graph(dm_f, rankdir = "BT", col_attr = c("column", "type"), columnArrows = F)
dm_render_graph(graph)3.2 Schema Card with primary keys
dm_add_references でprimary key情報(右の値)を付与する
rankdir=BTは Bottom -> Topの意味
dm_f <- dm_add_references(
dm_f,
flights$carrier == airlines$carrier,
flights$origin == airports$faa,
flights$dest == airports$faa,
flights$tailnum == planes$tailnum,
weather$origin == airports$faa
)
graph <- dm_create_graph(dm_f, rankdir = "BT", col_attr = c("column", "type"), edge_attrs = "dir = both, arrowtail = crow, arrowhead = odiamond", columnArrows = F)
dm_render_graph(graph)3.3 Colored Schema Card with primary keys
dm_set_display を通すことで色をつけることができる
display <- list(
accent1 = c("flights"),
accent2 = c("airlines"),
accent3 = c("weather"),
accent4 = c("airports"),
accent6 = c("planes")
)
dm_f <- dm_add_references(
dm_f,
flights$carrier == airlines$carrier,
flights$origin == airports$faa,
flights$dest == airports$faa,
flights$tailnum == planes$tailnum,
weather$origin == airports$faa
)
dm_f <- dm_set_display(dm_f, display)
graph <- dm_create_graph(dm_f, rankdir = "BT", col_attr = c("column", "type"), edge_attrs = "dir = both, arrowtail = crow, arrowhead = odiamond", columnArrows = F)
dm_render_graph(graph)3.4 Colored Schema Card with primary keys and segment
dm_set_segment でセグメントを入れれる
display <- list(
accent1 = c("flights"),
accent2 = c("airlines"),
accent3 = c("weather"),
accent4 = c("airports"),
accent6 = c("planes")
)
dm_f <- dm_add_references(
dm_f,
flights$carrier == airlines$carrier,
flights$origin == airports$faa,
flights$dest == airports$faa,
flights$tailnum == planes$tailnum,
weather$origin == airports$faa
)
dm_f <- dm_set_display(dm_f, display)
table_segments <- list(
A = c("flights", "airlines"),
B = c("weather"),
C = c("airports", "planes")
)
dm_f <- dm_set_segment(dm_f, table_segments)
graph <- dm_create_graph(dm_f, rankdir = "BT", col_attr = c("column", "type"), edge_attrs = "dir = both, arrowtail = crow, arrowhead = odiamond", columnArrows = F)
dm_render_graph(graph)3.5 Shiny
リアルタイムで作れるのもある。すご
# install.packages("shinyAce")
shiny::runApp(system.file("shiny", package = "datamodelr"))